import re
import glob

dic = {}

f = open('py-tbl.txt', 'r', encoding='utf-8')
for line in f:
	try:
		(py, all_hz) = line.split()
	except Exception:
		continue
	for hz in all_hz:
		dic.setdefault(hz, []).append(py.replace('-', ''))
f.close()

def get_pinyin(hz_str):
	def get_py(hz_str, py_str = ''):
		if hz_str == '':
			return (py_str[1:],)	# returns a tuple
		py_list = dic.get(hz_str[0])
		out = []
		for py in py_list:
			out += get_py(hz_str[1:],  py_str + "'" + py)
		return out
	return get_py(hz_str)

#print(get_pinyin('车马炮藏药'))

outf1 = open('out1.txt', 'w', encoding='utf-8')
outf2 = open('out2.txt', 'w', encoding='utf-8')
for file_name in glob.glob('THUOCL_*.txt'):
	print(file_name)
	f = open(file_name, 'r', encoding='utf-8')
	for line in f:
		try:
			word, cnt = line.split()
			if len(word) == 1 or len(word) > 5: continue
			try: pinyin = get_pinyin(word)
			except Exception: continue
			outf = outf2 if len(pinyin) > 1 else outf1
			for py in pinyin: print(word, py, '5', file=outf)
		except Exception:
			print('错误：', file_name, line)
			exit()
	f.close()
outf2.close(); outf1.close()
